{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip install selenium -i https://pypi.tuna.tsinghua.edu.cn/simple/\n",
    "\n",
    "from selenium import webdriver\n",
    "import time\n",
    "import requests\n",
    "import json\n",
    "from bs4 import BeautifulSoup\n",
    "from PIL import Image\n",
    "import os\n",
    "import re\n",
    "from requests.cookies import RequestsCookieJar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-2-e5583aac3c02>:6: DeprecationWarning: use options instead of chrome_options\n",
      "  browser = webdriver.Chrome(chrome_options=option)\n"
     ]
    }
   ],
   "source": [
    "login = \"https://auth.xincheng.com/?challengeNumber=cXozcGtoZ2RweDN3MGl5dDVka3Y3bWlnaHF5a3Z4endrbm92bXgyc2hpajdsdzVobjh5bjA5b3Z3cWZnNWJtZHZrejE5YW1zd2JsaGtnajBpZ2l4Y2QxeGVheGc4d3hsejZjanVhZnRhN2d5NWcxNzdiM255b2Rrbjl0cHdvZWl1dnprMzkwZTdlcmd0aDZjcDNkdTRpYjVqbXhheGhobGtqM3FpNzUzd3VtenA2ODNzdDZkZXdkdHo3dGt1d2Q1dHRkM2JmaWJwZXdvampwM2szeHlyNnZtMGVzb2xwcGJjcXhrcjJzdzN4NGRmYzBzZ2JmZjZvb2d6cnRwOHBqcQ==&flag=3&systemCode=A07&RetutnUrl=http%3A%2F%2Fekp.xincheng.com%2F\"\n",
    "\n",
    "\n",
    "option = webdriver.ChromeOptions()\n",
    "option.add_argument('headless') # 设置option\n",
    "browser = webdriver.Chrome(chrome_options=option)\n",
    "browser.get(login)\n",
    "time.sleep(2)\n",
    "browser.find_element_by_id(\"txtUserName\").click()\n",
    "time.sleep(0.2)\n",
    "browser.find_element_by_id(\"txtUserName\").send_keys(\"@@@@\")\n",
    "time.sleep(0.2)\n",
    "browser.find_element_by_id(\"txtPassword\").send_keys(\"@@@@\")\n",
    "time.sleep(0.2)\n",
    "browser.find_element_by_id(\"btnLogin\").click()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "url_shangxueyuan = \"https://xcsxy.yunxuetang.cn/kng/view/package/6e09ad5e95574a5aad515c3a83fb8ea2.html\"\n",
    "\n",
    "browser.get(url_shangxueyuan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "输入网站PPT地址：https://xcsxy.yunxuetang.cn/kng/course/package/document/205e59777ad349daad9682bf21dbd2cd_53adf876631e466f8436bcf04cc4d64d.html?m=1&uniqueid=1626683488502\n"
     ]
    }
   ],
   "source": [
    "urlList = ['https://xcsxy.yunxuetang.cn/kng/course/package/document/2e8a2de3e64f4ce4ac5444d482462a2b_c8b2a85e861e44829d752b2a06f6c303.html?uniqueid=1625728718605',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/42ca5a97f12f4a0da62c31a9103f69de_d907e793ee5e40a0a82e5007becbb916.html?uniqueid=1625730556716',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/52521b2863fe44e1b2fff3e583243c32_7dab8431b9f744a1841e24a806582814.html?uniqueid=1625730569881&uniqueid=637613561699300510',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/5f9d7aa5ed7a412d8ef7bf738de2ed37_4ab0b13c1d9e4ddb907bf6bec9ebb69a.html?uniqueid=1625730590141&uniqueid=637613561901863105',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/e9740f9c8ab844e3881c9492c8a6df9b_f3be50b926c04d85b53cefe7d6e98ef9.html?uniqueid=1625731256231&uniqueid=637613568562744355',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/9bbdef3135c640c8a7f81e35a777b0b6_563b075f4264443d8d17f5e229eadebd.html?uniqueid=1625731273327&uniqueid=637613568733676320',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/37d0d12dfa50452a95b5a72051d0f61a_b72766c701af469b885a6a51c3181ed7.html?uniqueid=1625731285232',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/9522abe50173453aa2ceb1459b923807_9a5868e2f75e4ef984615e269c8f66b1.html?uniqueid=1625731815654&uniqueid=637613574156885622',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/15de7c5ed766443f8db585b62b1323f5_51da3e01e5d24e6bbf0129876ab1ed17.html?uniqueid=1625731888943',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/812c43d5e3d440f9bf2c50d3570cbfc5_c9dba736a9f7406b8f690a1b793350c7.html?uniqueid=1625731992154&uniqueid=637613575921857879',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/4cd248bbe7e44ef598a75739838d7c43_bd68a965230a421ca0c9faece4ad6085.html?uniqueid=1625732056306&uniqueid=637613576563477640',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/e44c7a941f8544f89f137261e6bcad36_94b411fc0fa84f0eaf1e0250002740d7.html?uniqueid=1625732144496&uniqueid=637613577445562256',\n",
    "          'https://xcsxy.yunxuetang.cn/kng/course/package/document/94c03f718191431990add17147c220bc_3fb14bc88cf240938a2d5bfff174edf0.html?uniqueid=1625731347091',\n",
    "          ]\n",
    "\n",
    "\n",
    "\n",
    "url = input(\"输入网站PPT地址：\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "browser.get(url)   #此处最好也修改课程的url\n",
    "cookies = browser.get_cookies()\n",
    "with open(r\"c:\\cookies.txt\", \"w\") as fp:\n",
    "    json.dump(cookies, fp)\n",
    "s = requests.session()\n",
    "s.headers = {\n",
    "        \"User-Agent\": \"Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1\"\n",
    "    }\n",
    "\n",
    "\n",
    "#这里我们使用cookie对象进行处理\n",
    "jar = RequestsCookieJar()\n",
    "with open(r\"c:\\cookies.txt\", \"r\") as fp:\n",
    "    cookies = json.load(fp)\n",
    "    for cookie in cookies:\n",
    "        jar.set(cookie['name'], cookie['value'])\n",
    "\n",
    "r = s.get(url, cookies=jar)  #不同课程此处需要修改url\n",
    "\n",
    "soup = BeautifulSoup(r.text,\"lxml\")\n",
    "Div1 = soup.find(id = \"Div1\")\n",
    "title = soup.find(id = \"lblTitle\").text\n",
    "\n",
    "\n",
    "regexp = re.compile('(https://picebd.yunxuetang.cn\\S*?)\"')\n",
    "# co_tet='{\"pageIndex\":45,\"url\":\"https://picebd.yunxuetang.cn/v1/knowledgefiles/13661867270/docs/202006/b8b430fe3d6e462faff58d1f99affc66/44.jpg%40wm_2%2Ct_5YaF6YOo6LWE5paZIOazqOaEj%2BS/neWvhiDkuI3lvpflpJbkvKAgaGFuMyDpn6novrDosao%3D%2Cg_2%2Ca_-35%2Csz_32%2Cfc_A9A9A966%2Cff_U2ltSGVp%2Cfs_italic%7Cwm_2%2Ct_5YaF6YOo6LWE5paZIOazqOaEj%2BS/neWvhiDkuI3lvpflpJbkvKAgaGFuMyDpn6novrDosao%3D%2Cg_5%2Ca_-35%2Csz_32%2Cfc_A9A9A966%2Cff_U2ltSGVp%2Cfs_italic%7Cwm_2%2Ct_5YaF6YOo6LWE5paZIOazqOaEj%2BS/neWvhiDkuI3lvpflpJbkvKAgaGFuMyDpn6novrDosao%3D%2Cg_8%2Ca_-35%2Csz_32%2Cfc_A9A9A966%2Cff_U2ltSGVp%2Cfs_italic?authorization=bce-auth-v1%2F8eebe0747c344cfda150338aba428569%2F2021-07-07T15%3A37%3A35Z%2F28800%2F%2Fe9d05b9b0694702b8e3541f884b717c5939ebf89f073e210aec6d00e8bec3009\"},{\"pageIndex\":46,\"ur'\n",
    "# findall = re.findall(regexp,Div1)\n",
    "# print(findall)\n",
    "ppt_content = re.findall(regexp,str(Div1))\n",
    "\n",
    "i = 0\n",
    "\n",
    "for ppt_html in ppt_content:\n",
    "    with open(f\"C:\\\\jupyter\\\\爬虫学习\\\\elearning_ppt\\\\{i}.jpg\",\"wb\") as f:\n",
    "        resp_ppt = s.get(ppt_html)\n",
    "        ppt = resp_ppt.content\n",
    "        f.write(ppt)\n",
    "        i +=1\n",
    "\n",
    "file_list = os.listdir('.\\\\elearning_ppt\\\\')\n",
    "pic_name = []\n",
    "im_list = []\n",
    "for x in file_list:\n",
    "#     if \"jpg\" in x or 'png' in x or 'jpeg' in x:\n",
    "    pic_name.append(x)\n",
    "\n",
    "pic_name2 = []\n",
    "for y in pic_name:\n",
    "    z = y.split('.')\n",
    "    a = z[0]\n",
    "    pic_name2.append(int(a))\n",
    "pic_name2.sort()\n",
    "\n",
    "pic_name3 = [f\"{pic_}.jpg\" for pic_ in pic_name2]\n",
    "\n",
    "\n",
    "for pic in pic_name3:\n",
    "    img = Image.open('.\\\\elearning_ppt\\\\'+pic)\n",
    "    im_list.append(img)\n",
    "\n",
    "im1 = Image.open('.\\\\elearning_ppt\\\\0.jpg')\n",
    "im_list.pop(0)\n",
    "im1.save(f\"C:\\\\SAP 课件\\\\{title}.pdf\", \"PDF\", resolution=100.0, save_all=True, append_images=im_list)\n",
    "im1.close()\n",
    "\n",
    "browser.quit()\n",
    "for k in file_list:\n",
    "    os.remove(f'.\\\\elearning_ppt\\\\{k}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
